{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "machine_shape": "hm",
      "gpuType": "L4",
      "authorship_tag": "ABX9TyOkGQh7maXiQhQ6pYoY2NaU",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/louisoutin/dots.ocr/blob/master/dots_ocr.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# DotsOCR vLLM Openai API Compatible server"
      ],
      "metadata": {
        "id": "PshK9ZarVTfM"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install pyngrok\n",
        "!ngrok authtoken  # Get this from https://dashboard.ngrok.com/"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "oyY3E3mlOXNX",
        "outputId": "8d7ba92f-7170-4b2e-e8a0-c7f94096f7e0"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: pyngrok in /usr/local/lib/python3.11/dist-packages (7.3.0)\n",
            "Requirement already satisfied: PyYAML>=5.1 in /usr/local/lib/python3.11/dist-packages (from pyngrok) (6.0.2)\n",
            "Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!conda create -n dots_ocr python=3.12\n",
        "!conda activate dots_ocr\n",
        "\n",
        "!git clone https://github.com/rednote-hilab/dots.ocr.git"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "BcV7hkvuRnwS",
        "outputId": "7cb9c743-6f41-4c90-a05b-90bce2c29ced"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "/bin/bash: line 1: conda: command not found\n",
            "/bin/bash: line 1: conda: command not found\n",
            "Cloning into 'dots.ocr'...\n",
            "remote: Enumerating objects: 163, done.\u001b[K\n",
            "remote: Counting objects: 100% (51/51), done.\u001b[K\n",
            "remote: Compressing objects: 100% (31/31), done.\u001b[K\n",
            "remote: Total 163 (delta 30), reused 30 (delta 20), pack-reused 112 (from 1)\u001b[K\n",
            "Receiving objects: 100% (163/163), 35.82 MiB | 13.64 MiB/s, done.\n",
            "Resolving deltas: 100% (56/56), done.\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "cd /content/dots.ocr"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Rsc_MkGfRpit",
        "outputId": "5265315f-c27c-4346-cda7-aba7d4c226d6"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "/content/dots.ocr\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Install pytorch, see https://pytorch.org/get-started/previous-versions/ for your cuda version\n",
        "!pip install torch==2.7.0 torchvision==0.22.0 torchaudio==2.7.0 --index-url https://download.pytorch.org/whl/cu128\n",
        "!pip install -e ."
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "OxLaSyTJPFwk",
        "outputId": "a073dcdd-5e5d-4f62-d3b9-be9e9cf98d2f"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Looking in indexes: https://download.pytorch.org/whl/cu128\n",
            "Requirement already satisfied: torch==2.7.0 in /usr/local/lib/python3.11/dist-packages (2.7.0+cu128)\n",
            "Requirement already satisfied: torchvision==0.22.0 in /usr/local/lib/python3.11/dist-packages (0.22.0+cu128)\n",
            "Requirement already satisfied: torchaudio==2.7.0 in /usr/local/lib/python3.11/dist-packages (2.7.0+cu128)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (3.18.0)\n",
            "Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (4.14.1)\n",
            "Requirement already satisfied: sympy>=1.13.3 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (1.13.3)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (3.5)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (3.1.6)\n",
            "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (2025.3.0)\n",
            "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.8.61 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (12.8.61)\n",
            "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.8.57 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (12.8.57)\n",
            "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.8.57 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (12.8.57)\n",
            "Requirement already satisfied: nvidia-cudnn-cu12==9.7.1.26 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (9.7.1.26)\n",
            "Requirement already satisfied: nvidia-cublas-cu12==12.8.3.14 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (12.8.3.14)\n",
            "Requirement already satisfied: nvidia-cufft-cu12==11.3.3.41 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (11.3.3.41)\n",
            "Requirement already satisfied: nvidia-curand-cu12==10.3.9.55 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (10.3.9.55)\n",
            "Requirement already satisfied: nvidia-cusolver-cu12==11.7.2.55 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (11.7.2.55)\n",
            "Requirement already satisfied: nvidia-cusparse-cu12==12.5.7.53 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (12.5.7.53)\n",
            "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.3 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (0.6.3)\n",
            "Requirement already satisfied: nvidia-nccl-cu12==2.26.2 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (2.26.2)\n",
            "Requirement already satisfied: nvidia-nvtx-cu12==12.8.55 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (12.8.55)\n",
            "Requirement already satisfied: nvidia-nvjitlink-cu12==12.8.61 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (12.8.61)\n",
            "Requirement already satisfied: nvidia-cufile-cu12==1.13.0.11 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (1.13.0.11)\n",
            "Requirement already satisfied: triton==3.3.0 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (3.3.0)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from torchvision==0.22.0) (2.0.2)\n",
            "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.11/dist-packages (from torchvision==0.22.0) (11.3.0)\n",
            "Requirement already satisfied: setuptools>=40.8.0 in /usr/local/lib/python3.11/dist-packages (from triton==3.3.0->torch==2.7.0) (75.2.0)\n",
            "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy>=1.13.3->torch==2.7.0) (1.3.0)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch==2.7.0) (3.0.2)\n",
            "Obtaining file:///content/dots.ocr\n",
            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Requirement already satisfied: gradio in /usr/local/lib/python3.11/dist-packages (from dots_ocr==1.0) (5.39.0)\n",
            "Collecting gradio_image_annotation (from dots_ocr==1.0)\n",
            "  Downloading gradio_image_annotation-0.4.0-py3-none-any.whl.metadata (17 kB)\n",
            "Collecting PyMuPDF (from dots_ocr==1.0)\n",
            "  Downloading pymupdf-1.26.3-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (3.4 kB)\n",
            "Requirement already satisfied: openai in /usr/local/lib/python3.11/dist-packages (from dots_ocr==1.0) (1.98.0)\n",
            "Collecting qwen_vl_utils (from dots_ocr==1.0)\n",
            "  Downloading qwen_vl_utils-0.0.11-py3-none-any.whl.metadata (6.3 kB)\n",
            "Collecting transformers==4.51.3 (from dots_ocr==1.0)\n",
            "  Downloading transformers-4.51.3-py3-none-any.whl.metadata (38 kB)\n",
            "Requirement already satisfied: huggingface_hub in /usr/local/lib/python3.11/dist-packages (from dots_ocr==1.0) (0.34.3)\n",
            "Collecting modelscope (from dots_ocr==1.0)\n",
            "  Downloading modelscope-1.28.2-py3-none-any.whl.metadata (39 kB)\n",
            "Collecting flash-attn==2.8.0.post2 (from dots_ocr==1.0)\n",
            "  Downloading flash_attn-2.8.0.post2.tar.gz (7.9 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.9/7.9 MB\u001b[0m \u001b[31m124.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Requirement already satisfied: accelerate in /usr/local/lib/python3.11/dist-packages (from dots_ocr==1.0) (1.9.0)\n",
            "Requirement already satisfied: torch in /usr/local/lib/python3.11/dist-packages (from flash-attn==2.8.0.post2->dots_ocr==1.0) (2.7.0+cu128)\n",
            "Requirement already satisfied: einops in /usr/local/lib/python3.11/dist-packages (from flash-attn==2.8.0.post2->dots_ocr==1.0) (0.8.1)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from transformers==4.51.3->dots_ocr==1.0) (3.18.0)\n",
            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from transformers==4.51.3->dots_ocr==1.0) (2.0.2)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from transformers==4.51.3->dots_ocr==1.0) (25.0)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from transformers==4.51.3->dots_ocr==1.0) (6.0.2)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers==4.51.3->dots_ocr==1.0) (2024.11.6)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from transformers==4.51.3->dots_ocr==1.0) (2.32.3)\n",
            "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers==4.51.3->dots_ocr==1.0) (0.21.4)\n",
            "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.11/dist-packages (from transformers==4.51.3->dots_ocr==1.0) (0.5.3)\n",
            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.11/dist-packages (from transformers==4.51.3->dots_ocr==1.0) (4.67.1)\n",
            "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface_hub->dots_ocr==1.0) (2025.3.0)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface_hub->dots_ocr==1.0) (4.14.1)\n",
            "Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.11/dist-packages (from huggingface_hub->dots_ocr==1.0) (1.1.5)\n",
            "Requirement already satisfied: psutil in /usr/local/lib/python3.11/dist-packages (from accelerate->dots_ocr==1.0) (5.9.5)\n",
            "Requirement already satisfied: aiofiles<25.0,>=22.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (24.1.0)\n",
            "Requirement already satisfied: anyio<5.0,>=3.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (4.10.0)\n",
            "Requirement already satisfied: brotli>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (1.1.0)\n",
            "Requirement already satisfied: fastapi<1.0,>=0.115.2 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (0.116.1)\n",
            "Requirement already satisfied: ffmpy in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (0.6.1)\n",
            "Requirement already satisfied: gradio-client==1.11.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (1.11.0)\n",
            "Requirement already satisfied: groovy~=0.1 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (0.1.2)\n",
            "Requirement already satisfied: httpx<1.0,>=0.24.1 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (0.28.1)\n",
            "Requirement already satisfied: jinja2<4.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (3.1.6)\n",
            "Requirement already satisfied: markupsafe<4.0,>=2.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (3.0.2)\n",
            "Requirement already satisfied: orjson~=3.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (3.11.1)\n",
            "Requirement already satisfied: pandas<3.0,>=1.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (2.2.2)\n",
            "Requirement already satisfied: pillow<12.0,>=8.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (11.3.0)\n",
            "Requirement already satisfied: pydantic<2.12,>=2.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (2.11.7)\n",
            "Requirement already satisfied: pydub in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (0.25.1)\n",
            "Requirement already satisfied: python-multipart>=0.0.18 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (0.0.20)\n",
            "Requirement already satisfied: ruff>=0.9.3 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (0.12.7)\n",
            "Requirement already satisfied: safehttpx<0.2.0,>=0.1.6 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (0.1.6)\n",
            "Requirement already satisfied: semantic-version~=2.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (2.10.0)\n",
            "Requirement already satisfied: starlette<1.0,>=0.40.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (0.47.2)\n",
            "Requirement already satisfied: tomlkit<0.14.0,>=0.12.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (0.13.3)\n",
            "Requirement already satisfied: typer<1.0,>=0.12 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (0.16.0)\n",
            "Requirement already satisfied: uvicorn>=0.14.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (0.35.0)\n",
            "Requirement already satisfied: websockets<16.0,>=10.0 in /usr/local/lib/python3.11/dist-packages (from gradio-client==1.11.0->gradio->dots_ocr==1.0) (15.0.1)\n",
            "Requirement already satisfied: setuptools in /usr/local/lib/python3.11/dist-packages (from modelscope->dots_ocr==1.0) (75.2.0)\n",
            "Requirement already satisfied: urllib3>=1.26 in /usr/local/lib/python3.11/dist-packages (from modelscope->dots_ocr==1.0) (2.5.0)\n",
            "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.11/dist-packages (from openai->dots_ocr==1.0) (1.9.0)\n",
            "Requirement already satisfied: jiter<1,>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from openai->dots_ocr==1.0) (0.10.0)\n",
            "Requirement already satisfied: sniffio in /usr/local/lib/python3.11/dist-packages (from openai->dots_ocr==1.0) (1.3.1)\n",
            "Collecting av (from qwen_vl_utils->dots_ocr==1.0)\n",
            "  Downloading av-15.0.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.6 kB)\n",
            "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.11/dist-packages (from anyio<5.0,>=3.0->gradio->dots_ocr==1.0) (3.10)\n",
            "Requirement already satisfied: certifi in /usr/local/lib/python3.11/dist-packages (from httpx<1.0,>=0.24.1->gradio->dots_ocr==1.0) (2025.8.3)\n",
            "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx<1.0,>=0.24.1->gradio->dots_ocr==1.0) (1.0.9)\n",
            "Requirement already satisfied: h11>=0.16 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx<1.0,>=0.24.1->gradio->dots_ocr==1.0) (0.16.0)\n",
            "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0,>=1.0->gradio->dots_ocr==1.0) (2.9.0.post0)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0,>=1.0->gradio->dots_ocr==1.0) (2025.2)\n",
            "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0,>=1.0->gradio->dots_ocr==1.0) (2025.2)\n",
            "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<2.12,>=2.0->gradio->dots_ocr==1.0) (0.7.0)\n",
            "Requirement already satisfied: pydantic-core==2.33.2 in /usr/local/lib/python3.11/dist-packages (from pydantic<2.12,>=2.0->gradio->dots_ocr==1.0) (2.33.2)\n",
            "Requirement already satisfied: typing-inspection>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<2.12,>=2.0->gradio->dots_ocr==1.0) (0.4.1)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->transformers==4.51.3->dots_ocr==1.0) (3.4.2)\n",
            "Requirement already satisfied: sympy>=1.13.3 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (1.13.3)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (3.5)\n",
            "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.8.61 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (12.8.61)\n",
            "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.8.57 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (12.8.57)\n",
            "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.8.57 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (12.8.57)\n",
            "Requirement already satisfied: nvidia-cudnn-cu12==9.7.1.26 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (9.7.1.26)\n",
            "Requirement already satisfied: nvidia-cublas-cu12==12.8.3.14 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (12.8.3.14)\n",
            "Requirement already satisfied: nvidia-cufft-cu12==11.3.3.41 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (11.3.3.41)\n",
            "Requirement already satisfied: nvidia-curand-cu12==10.3.9.55 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (10.3.9.55)\n",
            "Requirement already satisfied: nvidia-cusolver-cu12==11.7.2.55 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (11.7.2.55)\n",
            "Requirement already satisfied: nvidia-cusparse-cu12==12.5.7.53 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (12.5.7.53)\n",
            "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.3 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (0.6.3)\n",
            "Requirement already satisfied: nvidia-nccl-cu12==2.26.2 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (2.26.2)\n",
            "Requirement already satisfied: nvidia-nvtx-cu12==12.8.55 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (12.8.55)\n",
            "Requirement already satisfied: nvidia-nvjitlink-cu12==12.8.61 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (12.8.61)\n",
            "Requirement already satisfied: nvidia-cufile-cu12==1.13.0.11 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (1.13.0.11)\n",
            "Requirement already satisfied: triton==3.3.0 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (3.3.0)\n",
            "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0,>=0.12->gradio->dots_ocr==1.0) (8.2.1)\n",
            "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0,>=0.12->gradio->dots_ocr==1.0) (1.5.4)\n",
            "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0,>=0.12->gradio->dots_ocr==1.0) (13.9.4)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3.0,>=1.0->gradio->dots_ocr==1.0) (1.17.0)\n",
            "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio->dots_ocr==1.0) (3.0.0)\n",
            "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio->dots_ocr==1.0) (2.19.2)\n",
            "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy>=1.13.3->torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (1.3.0)\n",
            "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.12->gradio->dots_ocr==1.0) (0.1.2)\n",
            "Downloading transformers-4.51.3-py3-none-any.whl (10.4 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.4/10.4 MB\u001b[0m \u001b[31m132.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading gradio_image_annotation-0.4.0-py3-none-any.whl (91 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m91.5/91.5 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading modelscope-1.28.2-py3-none-any.whl (5.9 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m129.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading pymupdf-1.26.3-cp39-abi3-manylinux_2_28_x86_64.whl (24.1 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.1/24.1 MB\u001b[0m \u001b[31m101.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading qwen_vl_utils-0.0.11-py3-none-any.whl (7.6 kB)\n",
            "Downloading av-15.0.0-cp311-cp311-manylinux_2_28_x86_64.whl (39.7 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m39.7/39.7 MB\u001b[0m \u001b[31m61.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hBuilding wheels for collected packages: flash-attn\n",
            "  Building wheel for flash-attn (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for flash-attn: filename=flash_attn-2.8.0.post2-cp311-cp311-linux_x86_64.whl size=255941661 sha256=8ed71ac092f80b079d2e6043b769135904d6e834916cb6da7d372b394581447b\n",
            "  Stored in directory: /root/.cache/pip/wheels/a2/75/55/57ba1e272fd7fa1a01d9ba6b5334b7adaabf79900ede22c040\n",
            "Successfully built flash-attn\n",
            "Installing collected packages: PyMuPDF, av, qwen_vl_utils, modelscope, transformers, flash-attn, gradio_image_annotation, dots_ocr\n",
            "  Attempting uninstall: transformers\n",
            "    Found existing installation: transformers 4.54.1\n",
            "    Uninstalling transformers-4.54.1:\n",
            "      Successfully uninstalled transformers-4.54.1\n",
            "  Running setup.py develop for dots_ocr\n",
            "Successfully installed PyMuPDF-1.26.3 av-15.0.0 dots_ocr-1.0 flash-attn-2.8.0.post2 gradio_image_annotation-0.4.0 modelscope-1.28.2 qwen_vl_utils-0.0.11 transformers-4.51.3\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!python3 tools/download_model.py"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "z0nKSOYsRaA2",
        "outputId": "e4d67ed5-0cb9-437a-abec-5514f7bb8ccc"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Attention: The model save dir dots.ocr should be replace by a name without `.` like DotsOCR, util we merge our code to transformers.\n",
            "/usr/local/lib/python3.11/dist-packages/huggingface_hub/file_download.py:945: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
            "  warnings.warn(\n",
            "/usr/local/lib/python3.11/dist-packages/huggingface_hub/file_download.py:982: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`.\n",
            "For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder.\n",
            "  warnings.warn(\n",
            "Fetching 19 files:   0% 0/19 [00:00<?, ?it/s]\n",
            "chat_template.json: 1.11kB [00:00, 2.48MB/s]\n",
            "\n",
            "generation_config.json: 100% 74.0/74.0 [00:00<00:00, 727kB/s]\n",
            "\n",
            "configuration_dots.py: 2.93kB [00:00, 15.1MB/s]\n",
            "\n",
            ".gitattributes: 1.52kB [00:00, 8.96MB/s]\n",
            "Fetching 19 files:   5% 1/19 [00:00<00:05,  3.39it/s]\n",
            "config.json: 1.47kB [00:00, 9.48MB/s]\n",
            "\n",
            "README.md: 31.1kB [00:00, 76.2MB/s]\n",
            "\n",
            "NOTICE: 118kB [00:00, 148MB/s]\n",
            "\n",
            "model-00002-of-00002.safetensors:   0% 0.00/1.79G [00:00<?, ?B/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:   0% 0.00/4.29G [00:00<?, ?B/s]\u001b[A\u001b[A\n",
            "\n",
            "\n",
            "model.safetensors.index.json: 52.2kB [00:00, 153MB/s]\n",
            "\n",
            "\n",
            "\n",
            "modeling_dots_ocr_vllm.py: 17.5kB [00:00, 53.8MB/s]\n",
            "\n",
            "\n",
            "\n",
            "modeling_dots_ocr.py: 4.98kB [00:00, 17.7MB/s]\n",
            "\n",
            "\n",
            "\n",
            "modeling_dots_vision.py: 14.9kB [00:00, 44.7MB/s]\n",
            "\n",
            "\n",
            "model-00001-of-00002.safetensors:   0% 21.0M/4.29G [00:00<00:21, 202MB/s]\u001b[A\u001b[A\n",
            "\n",
            "\n",
            "preprocessor_config.json: 100% 347/347 [00:00<00:00, 2.95MB/s]\n",
            "\n",
            "model-00002-of-00002.safetensors:   1% 21.0M/1.79G [00:00<00:10, 175MB/s]\u001b[A\n",
            "\n",
            "\n",
            "merges.txt: 0.00B [00:00, ?B/s]\u001b[A\u001b[A\u001b[A\n",
            "\n",
            "merges.txt: 1.67MB [00:00, 71.2MB/s]\n",
            "Fetching 19 files:  42% 8/19 [00:00<00:00, 12.00it/s]\n",
            "model-00002-of-00002.safetensors:   3% 52.4M/1.79G [00:00<00:07, 227MB/s]\u001b[A\n",
            "\n",
            "\n",
            "special_tokens_map.json: 100% 494/494 [00:00<00:00, 2.97MB/s]\n",
            "\n",
            "\n",
            "\n",
            "tokenizer.json: 0.00B [00:00, ?B/s]\u001b[A\u001b[A\u001b[A\n",
            "\n",
            "\n",
            "\n",
            "tokenizer_config.json: 9.31kB [00:00, 33.9MB/s]\n",
            "\n",
            "\n",
            "model-00001-of-00002.safetensors:   2% 105M/4.29G [00:00<00:11, 350MB/s] \u001b[A\u001b[A\n",
            "\n",
            "\n",
            "\n",
            "tokenizer.json: 7.04MB [00:00, 133MB/s]\n",
            "vocab.json: 2.78MB [00:00, 102MB/s]\n",
            "\n",
            "model-00002-of-00002.safetensors:   5% 94.4M/1.79G [00:00<00:05, 292MB/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:   4% 157M/4.29G [00:00<00:10, 413MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:   9% 157M/1.79G [00:00<00:04, 389MB/s] \u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:   5% 220M/4.29G [00:00<00:08, 461MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  12% 210M/1.79G [00:00<00:03, 432MB/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:   6% 273M/4.29G [00:00<00:08, 479MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  15% 273M/1.79G [00:00<00:03, 466MB/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:   8% 325M/4.29G [00:00<00:08, 489MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  18% 325M/1.79G [00:00<00:03, 477MB/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:   9% 377M/4.29G [00:00<00:07, 496MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  21% 377M/1.79G [00:00<00:02, 484MB/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  10% 430M/4.29G [00:00<00:07, 484MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  24% 430M/1.79G [00:01<00:02, 473MB/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  11% 482M/4.29G [00:01<00:08, 469MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  27% 482M/1.79G [00:01<00:02, 464MB/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  12% 535M/4.29G [00:01<00:08, 444MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  30% 535M/1.79G [00:01<00:02, 469MB/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  14% 587M/4.29G [00:01<00:08, 439MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  33% 587M/1.79G [00:01<00:02, 462MB/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  15% 640M/4.29G [00:01<00:08, 437MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  36% 640M/1.79G [00:01<00:02, 459MB/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  16% 692M/4.29G [00:03<00:50, 71.2MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  39% 692M/1.79G [00:03<00:15, 72.2MB/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  17% 744M/4.29G [00:03<00:37, 95.4MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  42% 744M/1.79G [00:03<00:10, 96.8MB/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  19% 797M/4.29G [00:03<00:27, 125MB/s] \u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  45% 797M/1.79G [00:03<00:07, 127MB/s] \u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  20% 849M/4.29G [00:03<00:21, 159MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  48% 849M/1.79G [00:03<00:05, 161MB/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  21% 891M/4.29G [00:04<00:17, 189MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  51% 902M/1.79G [00:04<00:04, 201MB/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  22% 944M/4.29G [00:04<00:14, 229MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  53% 954M/1.79G [00:04<00:03, 242MB/s]\u001b[A\n",
            "model-00002-of-00002.safetensors:  56% 1.01G/1.79G [00:04<00:02, 288MB/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  23% 996M/4.29G [00:04<00:12, 269MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  59% 1.06G/1.79G [00:04<00:02, 318MB/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  24% 1.05G/4.29G [00:04<00:10, 304MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  62% 1.11G/1.79G [00:04<00:01, 343MB/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  26% 1.10G/4.29G [00:04<00:09, 327MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  65% 1.16G/1.79G [00:04<00:01, 364MB/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  27% 1.15G/4.29G [00:04<00:08, 353MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  68% 1.22G/1.79G [00:04<00:01, 387MB/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  28% 1.21G/4.29G [00:04<00:08, 370MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  72% 1.28G/1.79G [00:04<00:01, 425MB/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  29% 1.26G/4.29G [00:04<00:08, 355MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  75% 1.33G/1.79G [00:05<00:01, 348MB/s]\u001b[A\n",
            "model-00002-of-00002.safetensors:  78% 1.39G/1.79G [00:05<00:00, 400MB/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  30% 1.30G/4.29G [00:05<00:12, 240MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  81% 1.45G/1.79G [00:05<00:00, 415MB/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  31% 1.34G/4.29G [00:05<00:11, 264MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  84% 1.50G/1.79G [00:07<00:04, 68.5MB/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  32% 1.38G/4.29G [00:07<00:52, 55.7MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  87% 1.55G/1.79G [00:07<00:02, 91.2MB/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  33% 1.44G/4.29G [00:07<00:36, 78.6MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  90% 1.60G/1.79G [00:07<00:01, 120MB/s] \u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  35% 1.49G/4.29G [00:07<00:26, 106MB/s] \u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  93% 1.66G/1.79G [00:08<00:00, 154MB/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  36% 1.54G/4.29G [00:08<00:19, 139MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  96% 1.71G/1.79G [00:08<00:00, 194MB/s]\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  37% 1.58G/4.29G [00:08<00:16, 169MB/s]\u001b[A\u001b[A\n",
            "model-00002-of-00002.safetensors:  99% 1.76G/1.79G [00:08<00:00, 233MB/s]\u001b[A\n",
            "\n",
            "model-00002-of-00002.safetensors: 100% 1.79G/1.79G [00:08<00:00, 215MB/s]\n",
            "\n",
            "\n",
            "model-00001-of-00002.safetensors:  39% 1.68G/4.29G [00:08<00:10, 247MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  40% 1.73G/4.29G [00:08<00:08, 294MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  42% 1.78G/4.29G [00:08<00:07, 331MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  43% 1.84G/4.29G [00:08<00:06, 361MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  44% 1.89G/4.29G [00:08<00:06, 385MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  45% 1.94G/4.29G [00:08<00:05, 419MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  46% 1.99G/4.29G [00:09<00:05, 445MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  48% 2.04G/4.29G [00:09<00:04, 466MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  49% 2.10G/4.29G [00:09<00:04, 478MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  50% 2.15G/4.29G [00:09<00:04, 490MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  52% 2.21G/4.29G [00:09<00:04, 508MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  53% 2.28G/4.29G [00:09<00:03, 513MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  54% 2.34G/4.29G [00:09<00:03, 518MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  56% 2.40G/4.29G [00:09<00:03, 531MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  57% 2.46G/4.29G [00:09<00:03, 554MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  59% 2.53G/4.29G [00:10<00:03, 573MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  60% 2.59G/4.29G [00:10<00:02, 585MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  62% 2.65G/4.29G [00:10<00:02, 596MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  63% 2.72G/4.29G [00:10<00:04, 365MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  65% 2.78G/4.29G [00:10<00:03, 404MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  66% 2.84G/4.29G [00:10<00:03, 439MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  68% 2.90G/4.29G [00:10<00:02, 468MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  69% 2.97G/4.29G [00:11<00:02, 489MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  71% 3.03G/4.29G [00:11<00:03, 406MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  72% 3.08G/4.29G [00:11<00:03, 350MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  73% 3.12G/4.29G [00:11<00:03, 327MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  74% 3.17G/4.29G [00:11<00:03, 306MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  75% 3.21G/4.29G [00:11<00:03, 289MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  75% 3.24G/4.29G [00:12<00:03, 281MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  76% 3.27G/4.29G [00:12<00:03, 285MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  77% 3.31G/4.29G [00:12<00:03, 293MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  78% 3.36G/4.29G [00:12<00:02, 315MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  79% 3.41G/4.29G [00:12<00:02, 366MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  81% 3.46G/4.29G [00:12<00:02, 403MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  82% 3.50G/4.29G [00:15<00:18, 42.7MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  83% 3.57G/4.29G [00:15<00:11, 65.4MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  85% 3.63G/4.29G [00:16<00:07, 94.9MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  86% 3.69G/4.29G [00:16<00:04, 132MB/s] \u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  87% 3.75G/4.29G [00:16<00:03, 177MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  89% 3.82G/4.29G [00:16<00:02, 228MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  90% 3.88G/4.29G [00:16<00:01, 284MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  92% 3.94G/4.29G [00:16<00:01, 340MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  93% 4.01G/4.29G [00:16<00:00, 394MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  95% 4.07G/4.29G [00:16<00:00, 438MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  96% 4.13G/4.29G [00:16<00:00, 474MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors:  98% 4.19G/4.29G [00:17<00:00, 503MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00001-of-00002.safetensors: 100% 4.29G/4.29G [00:17<00:00, 250MB/s]\n",
            "Fetching 19 files: 100% 19/19 [00:17<00:00,  1.07it/s]\n",
            "model downloaded to /content/dots.ocr/weights/DotsOCR\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "from pathlib import Path\n",
        "\n",
        "# Set up model path (using a directory in Colab's temporary storage)\n",
        "hf_model_path = \"./weights/DotsOCR\"\n",
        "os.environ[\"hf_model_path\"] = hf_model_path\n",
        "\n",
        "# Create directory if it doesn't exist\n",
        "Path(hf_model_path).mkdir(parents=True, exist_ok=True)\n",
        "\n",
        "# Add to PYTHONPATH\n",
        "os.environ[\"PYTHONPATH\"] = f\"{os.path.dirname(hf_model_path)}:{os.environ.get('PYTHONPATH', '')}\"\n",
        "\n",
        "# Install required packages\n",
        "!pip install vllm==0.10.0 transformers\n",
        "\n",
        "# Modify vllm import (this is a workaround - may need adjustment based on vllm version)\n",
        "try:\n",
        "    vllm_path = !which vllm\n",
        "    if vllm_path:\n",
        "        vllm_path = vllm_path[0]\n",
        "        !sed -i '/^from vllm\\.entrypoints\\.cli\\.main import main$/a from DotsOCR import modeling_dots_ocr_vllm' {vllm_path}\n",
        "except:\n",
        "    print(\"Could not automatically modify vllm imports. You may need to do this manually.\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "m1eyfkYlTGTs",
        "outputId": "04548a02-fc8c-4891-8b95-0fad33f0f20e"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting vllm\n",
            "  Downloading vllm-0.10.0-cp38-abi3-manylinux1_x86_64.whl.metadata (14 kB)\n",
            "Requirement already satisfied: transformers in /usr/local/lib/python3.11/dist-packages (4.51.3)\n",
            "Requirement already satisfied: regex in /usr/local/lib/python3.11/dist-packages (from vllm) (2024.11.6)\n",
            "Requirement already satisfied: cachetools in /usr/local/lib/python3.11/dist-packages (from vllm) (5.5.2)\n",
            "Requirement already satisfied: psutil in /usr/local/lib/python3.11/dist-packages (from vllm) (5.9.5)\n",
            "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.11/dist-packages (from vllm) (0.2.0)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from vllm) (2.0.2)\n",
            "Requirement already satisfied: requests>=2.26.0 in /usr/local/lib/python3.11/dist-packages (from vllm) (2.32.3)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from vllm) (4.67.1)\n",
            "Collecting blake3 (from vllm)\n",
            "  Downloading blake3-1.0.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)\n",
            "Requirement already satisfied: py-cpuinfo in /usr/local/lib/python3.11/dist-packages (from vllm) (9.0.0)\n",
            "Collecting transformers\n",
            "  Downloading transformers-4.55.0-py3-none-any.whl.metadata (39 kB)\n",
            "Requirement already satisfied: huggingface-hub>=0.33.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub[hf_xet]>=0.33.0->vllm) (0.34.3)\n",
            "Requirement already satisfied: tokenizers>=0.21.1 in /usr/local/lib/python3.11/dist-packages (from vllm) (0.21.4)\n",
            "Requirement already satisfied: protobuf in /usr/local/lib/python3.11/dist-packages (from vllm) (5.29.5)\n",
            "Requirement already satisfied: fastapi>=0.115.0 in /usr/local/lib/python3.11/dist-packages (from fastapi[standard]>=0.115.0->vllm) (0.116.1)\n",
            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from vllm) (3.12.15)\n",
            "Collecting openai<=1.90.0,>=1.87.0 (from vllm)\n",
            "  Downloading openai-1.90.0-py3-none-any.whl.metadata (26 kB)\n",
            "Requirement already satisfied: pydantic>=2.10 in /usr/local/lib/python3.11/dist-packages (from vllm) (2.11.7)\n",
            "Requirement already satisfied: prometheus_client>=0.18.0 in /usr/local/lib/python3.11/dist-packages (from vllm) (0.22.1)\n",
            "Requirement already satisfied: pillow in /usr/local/lib/python3.11/dist-packages (from vllm) (11.3.0)\n",
            "Collecting prometheus-fastapi-instrumentator>=7.0.0 (from vllm)\n",
            "  Downloading prometheus_fastapi_instrumentator-7.1.0-py3-none-any.whl.metadata (13 kB)\n",
            "Requirement already satisfied: tiktoken>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from vllm) (0.9.0)\n",
            "Collecting lm-format-enforcer<0.11,>=0.10.11 (from vllm)\n",
            "  Downloading lm_format_enforcer-0.10.12-py3-none-any.whl.metadata (17 kB)\n",
            "Collecting llguidance<0.8.0,>=0.7.11 (from vllm)\n",
            "  Downloading llguidance-0.7.30-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)\n",
            "Collecting outlines_core==0.2.10 (from vllm)\n",
            "  Downloading outlines_core-0.2.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.8 kB)\n",
            "Collecting diskcache==5.6.3 (from vllm)\n",
            "  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)\n",
            "Collecting lark==1.2.2 (from vllm)\n",
            "  Downloading lark-1.2.2-py3-none-any.whl.metadata (1.8 kB)\n",
            "Collecting xgrammar==0.1.21 (from vllm)\n",
            "  Downloading xgrammar-0.1.21-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.3 kB)\n",
            "Requirement already satisfied: typing_extensions>=4.10 in /usr/local/lib/python3.11/dist-packages (from vllm) (4.14.1)\n",
            "Requirement already satisfied: filelock>=3.16.1 in /usr/local/lib/python3.11/dist-packages (from vllm) (3.18.0)\n",
            "Collecting partial-json-parser (from vllm)\n",
            "  Downloading partial_json_parser-0.2.1.1.post6-py3-none-any.whl.metadata (6.1 kB)\n",
            "Requirement already satisfied: pyzmq>=25.0.0 in /usr/local/lib/python3.11/dist-packages (from vllm) (26.2.1)\n",
            "Collecting msgspec (from vllm)\n",
            "  Downloading msgspec-0.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.9 kB)\n",
            "Collecting gguf>=0.13.0 (from vllm)\n",
            "  Downloading gguf-0.17.1-py3-none-any.whl.metadata (4.3 kB)\n",
            "Collecting mistral_common>=1.8.2 (from mistral_common[audio,image]>=1.8.2->vllm)\n",
            "  Downloading mistral_common-1.8.3-py3-none-any.whl.metadata (3.8 kB)\n",
            "Requirement already satisfied: opencv-python-headless>=4.11.0 in /usr/local/lib/python3.11/dist-packages (from vllm) (4.12.0.88)\n",
            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from vllm) (6.0.2)\n",
            "Requirement already satisfied: einops in /usr/local/lib/python3.11/dist-packages (from vllm) (0.8.1)\n",
            "Collecting compressed-tensors==0.10.2 (from vllm)\n",
            "  Downloading compressed_tensors-0.10.2-py3-none-any.whl.metadata (7.0 kB)\n",
            "Collecting depyf==0.19.0 (from vllm)\n",
            "  Downloading depyf-0.19.0-py3-none-any.whl.metadata (7.3 kB)\n",
            "Requirement already satisfied: cloudpickle in /usr/local/lib/python3.11/dist-packages (from vllm) (3.1.1)\n",
            "Collecting watchfiles (from vllm)\n",
            "  Downloading watchfiles-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n",
            "Collecting python-json-logger (from vllm)\n",
            "  Downloading python_json_logger-3.3.0-py3-none-any.whl.metadata (4.0 kB)\n",
            "Requirement already satisfied: scipy in /usr/local/lib/python3.11/dist-packages (from vllm) (1.16.1)\n",
            "Collecting ninja (from vllm)\n",
            "  Using cached ninja-1.11.1.4-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (5.0 kB)\n",
            "Collecting pybase64 (from vllm)\n",
            "  Downloading pybase64-1.4.2-cp311-cp311-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl.metadata (8.7 kB)\n",
            "Collecting cbor2 (from vllm)\n",
            "  Downloading cbor2-5.6.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.0 kB)\n",
            "Collecting numba==0.61.2 (from vllm)\n",
            "  Downloading numba-0.61.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (2.8 kB)\n",
            "Collecting ray!=2.44.*,>=2.43.0 (from ray[cgraph]!=2.44.*,>=2.43.0->vllm)\n",
            "  Downloading ray-2.48.0-cp311-cp311-manylinux2014_x86_64.whl.metadata (19 kB)\n",
            "Collecting torch==2.7.1 (from vllm)\n",
            "  Downloading torch-2.7.1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (29 kB)\n",
            "Collecting torchaudio==2.7.1 (from vllm)\n",
            "  Downloading torchaudio-2.7.1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (6.6 kB)\n",
            "Collecting torchvision==0.22.1 (from vllm)\n",
            "  Downloading torchvision-0.22.1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (6.1 kB)\n",
            "Collecting xformers==0.0.31 (from vllm)\n",
            "  Downloading xformers-0.0.31-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (1.0 kB)\n",
            "Collecting astor (from depyf==0.19.0->vllm)\n",
            "  Downloading astor-0.8.1-py2.py3-none-any.whl.metadata (4.2 kB)\n",
            "Requirement already satisfied: dill in /usr/local/lib/python3.11/dist-packages (from depyf==0.19.0->vllm) (0.3.8)\n",
            "Collecting llvmlite<0.45,>=0.44.0dev0 (from numba==0.61.2->vllm)\n",
            "  Downloading llvmlite-0.44.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.8 kB)\n",
            "Requirement already satisfied: sympy>=1.13.3 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.1->vllm) (1.13.3)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch==2.7.1->vllm) (3.5)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.1->vllm) (3.1.6)\n",
            "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch==2.7.1->vllm) (2025.3.0)\n",
            "Collecting nvidia-cuda-nvrtc-cu12==12.6.77 (from torch==2.7.1->vllm)\n",
            "  Downloading nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
            "Collecting nvidia-cuda-runtime-cu12==12.6.77 (from torch==2.7.1->vllm)\n",
            "  Downloading nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.5 kB)\n",
            "Collecting nvidia-cuda-cupti-cu12==12.6.80 (from torch==2.7.1->vllm)\n",
            "  Downloading nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.6 kB)\n",
            "Collecting nvidia-cudnn-cu12==9.5.1.17 (from torch==2.7.1->vllm)\n",
            "  Downloading nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_x86_64.whl.metadata (1.6 kB)\n",
            "Collecting nvidia-cublas-cu12==12.6.4.1 (from torch==2.7.1->vllm)\n",
            "  Downloading nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.5 kB)\n",
            "Collecting nvidia-cufft-cu12==11.3.0.4 (from torch==2.7.1->vllm)\n",
            "  Downloading nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.5 kB)\n",
            "Collecting nvidia-curand-cu12==10.3.7.77 (from torch==2.7.1->vllm)\n",
            "  Downloading nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.5 kB)\n",
            "Collecting nvidia-cusolver-cu12==11.7.1.2 (from torch==2.7.1->vllm)\n",
            "  Downloading nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.6 kB)\n",
            "Collecting nvidia-cusparse-cu12==12.5.4.2 (from torch==2.7.1->vllm)\n",
            "  Downloading nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.6 kB)\n",
            "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.3 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.1->vllm) (0.6.3)\n",
            "Requirement already satisfied: nvidia-nccl-cu12==2.26.2 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.1->vllm) (2.26.2)\n",
            "Collecting nvidia-nvtx-cu12==12.6.77 (from torch==2.7.1->vllm)\n",
            "  Downloading nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.6 kB)\n",
            "Collecting nvidia-nvjitlink-cu12==12.6.85 (from torch==2.7.1->vllm)\n",
            "  Downloading nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl.metadata (1.5 kB)\n",
            "Collecting nvidia-cufile-cu12==1.11.1.6 (from torch==2.7.1->vllm)\n",
            "  Downloading nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.5 kB)\n",
            "Collecting triton==3.3.1 (from torch==2.7.1->vllm)\n",
            "  Downloading triton-3.3.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (1.5 kB)\n",
            "Requirement already satisfied: setuptools>=40.8.0 in /usr/local/lib/python3.11/dist-packages (from triton==3.3.1->torch==2.7.1->vllm) (75.2.0)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from transformers) (25.0)\n",
            "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.5.3)\n",
            "Requirement already satisfied: starlette<0.48.0,>=0.40.0 in /usr/local/lib/python3.11/dist-packages (from fastapi>=0.115.0->fastapi[standard]>=0.115.0->vllm) (0.47.2)\n",
            "Collecting fastapi-cli>=0.0.8 (from fastapi-cli[standard]>=0.0.8; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm)\n",
            "  Downloading fastapi_cli-0.0.8-py3-none-any.whl.metadata (6.3 kB)\n",
            "Requirement already satisfied: httpx>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from fastapi[standard]>=0.115.0->vllm) (0.28.1)\n",
            "Requirement already satisfied: python-multipart>=0.0.18 in /usr/local/lib/python3.11/dist-packages (from fastapi[standard]>=0.115.0->vllm) (0.0.20)\n",
            "Collecting email-validator>=2.0.0 (from fastapi[standard]>=0.115.0->vllm)\n",
            "  Downloading email_validator-2.2.0-py3-none-any.whl.metadata (25 kB)\n",
            "Requirement already satisfied: uvicorn>=0.12.0 in /usr/local/lib/python3.11/dist-packages (from uvicorn[standard]>=0.12.0; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm) (0.35.0)\n",
            "Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.33.0->huggingface-hub[hf_xet]>=0.33.0->vllm) (1.1.5)\n",
            "Collecting interegular>=0.3.2 (from lm-format-enforcer<0.11,>=0.10.11->vllm)\n",
            "  Downloading interegular-0.3.3-py37-none-any.whl.metadata (3.0 kB)\n",
            "Requirement already satisfied: jsonschema>=4.21.1 in /usr/local/lib/python3.11/dist-packages (from mistral_common>=1.8.2->mistral_common[audio,image]>=1.8.2->vllm) (4.25.0)\n",
            "Collecting pydantic-extra-types>=2.10.5 (from pydantic-extra-types[pycountry]>=2.10.5->mistral_common>=1.8.2->mistral_common[audio,image]>=1.8.2->vllm)\n",
            "  Downloading pydantic_extra_types-2.10.5-py3-none-any.whl.metadata (3.9 kB)\n",
            "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.11/dist-packages (from openai<=1.90.0,>=1.87.0->vllm) (4.10.0)\n",
            "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.11/dist-packages (from openai<=1.90.0,>=1.87.0->vllm) (1.9.0)\n",
            "Requirement already satisfied: jiter<1,>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from openai<=1.90.0,>=1.87.0->vllm) (0.10.0)\n",
            "Requirement already satisfied: sniffio in /usr/local/lib/python3.11/dist-packages (from openai<=1.90.0,>=1.87.0->vllm) (1.3.1)\n",
            "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic>=2.10->vllm) (0.7.0)\n",
            "Requirement already satisfied: pydantic-core==2.33.2 in /usr/local/lib/python3.11/dist-packages (from pydantic>=2.10->vllm) (2.33.2)\n",
            "Requirement already satisfied: typing-inspection>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from pydantic>=2.10->vllm) (0.4.1)\n",
            "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.11/dist-packages (from ray!=2.44.*,>=2.43.0->ray[cgraph]!=2.44.*,>=2.43.0->vllm) (8.2.1)\n",
            "Requirement already satisfied: msgpack<2.0.0,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from ray!=2.44.*,>=2.43.0->ray[cgraph]!=2.44.*,>=2.43.0->vllm) (1.1.1)\n",
            "Requirement already satisfied: cupy-cuda12x in /usr/local/lib/python3.11/dist-packages (from ray[cgraph]!=2.44.*,>=2.43.0->vllm) (13.3.0)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests>=2.26.0->vllm) (3.4.2)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests>=2.26.0->vllm) (3.10)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests>=2.26.0->vllm) (2.5.0)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests>=2.26.0->vllm) (2025.8.3)\n",
            "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->vllm) (2.6.1)\n",
            "Requirement already satisfied: aiosignal>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->vllm) (1.4.0)\n",
            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->vllm) (25.3.0)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->vllm) (1.7.0)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->vllm) (6.6.3)\n",
            "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->vllm) (0.3.2)\n",
            "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->vllm) (1.20.1)\n",
            "Collecting dnspython>=2.0.0 (from email-validator>=2.0.0->fastapi[standard]>=0.115.0->vllm)\n",
            "  Downloading dnspython-2.7.0-py3-none-any.whl.metadata (5.8 kB)\n",
            "Requirement already satisfied: typer>=0.15.1 in /usr/local/lib/python3.11/dist-packages (from fastapi-cli>=0.0.8->fastapi-cli[standard]>=0.0.8; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm) (0.16.0)\n",
            "Collecting rich-toolkit>=0.14.8 (from fastapi-cli>=0.0.8->fastapi-cli[standard]>=0.0.8; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm)\n",
            "  Downloading rich_toolkit-0.14.9-py3-none-any.whl.metadata (999 bytes)\n",
            "Collecting fastapi-cloud-cli>=0.1.1 (from fastapi-cli[standard]>=0.0.8; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm)\n",
            "  Downloading fastapi_cloud_cli-0.1.5-py3-none-any.whl.metadata (3.2 kB)\n",
            "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx>=0.23.0->fastapi[standard]>=0.115.0->vllm) (1.0.9)\n",
            "Requirement already satisfied: h11>=0.16 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx>=0.23.0->fastapi[standard]>=0.115.0->vllm) (0.16.0)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch==2.7.1->vllm) (3.0.2)\n",
            "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=4.21.1->mistral_common>=1.8.2->mistral_common[audio,image]>=1.8.2->vllm) (2025.4.1)\n",
            "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=4.21.1->mistral_common>=1.8.2->mistral_common[audio,image]>=1.8.2->vllm) (0.36.2)\n",
            "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=4.21.1->mistral_common>=1.8.2->mistral_common[audio,image]>=1.8.2->vllm) (0.26.0)\n",
            "Collecting pycountry>=23 (from pydantic-extra-types[pycountry]>=2.10.5->mistral_common>=1.8.2->mistral_common[audio,image]>=1.8.2->vllm)\n",
            "  Downloading pycountry-24.6.1-py3-none-any.whl.metadata (12 kB)\n",
            "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy>=1.13.3->torch==2.7.1->vllm) (1.3.0)\n",
            "Collecting httptools>=0.6.3 (from uvicorn[standard]>=0.12.0; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm)\n",
            "  Downloading httptools-0.6.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)\n",
            "Collecting python-dotenv>=0.13 (from uvicorn[standard]>=0.12.0; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm)\n",
            "  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)\n",
            "Collecting uvloop>=0.15.1 (from uvicorn[standard]>=0.12.0; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm)\n",
            "  Downloading uvloop-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n",
            "Requirement already satisfied: websockets>=10.4 in /usr/local/lib/python3.11/dist-packages (from uvicorn[standard]>=0.12.0; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm) (15.0.1)\n",
            "Requirement already satisfied: fastrlock>=0.5 in /usr/local/lib/python3.11/dist-packages (from cupy-cuda12x->ray[cgraph]!=2.44.*,>=2.43.0->vllm) (0.8.3)\n",
            "Requirement already satisfied: soundfile>=0.12.1 in /usr/local/lib/python3.11/dist-packages (from mistral_common>=1.8.2->mistral_common[audio,image]>=1.8.2->vllm) (0.13.1)\n",
            "Requirement already satisfied: soxr>=0.5.0 in /usr/local/lib/python3.11/dist-packages (from mistral_common>=1.8.2->mistral_common[audio,image]>=1.8.2->vllm) (0.5.0.post1)\n",
            "Collecting rignore>=0.5.1 (from fastapi-cloud-cli>=0.1.1->fastapi-cli[standard]>=0.0.8; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm)\n",
            "  Downloading rignore-0.6.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)\n",
            "Requirement already satisfied: sentry-sdk>=2.20.0 in /usr/local/lib/python3.11/dist-packages (from fastapi-cloud-cli>=0.1.1->fastapi-cli[standard]>=0.0.8; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm) (2.34.1)\n",
            "Requirement already satisfied: rich>=13.7.1 in /usr/local/lib/python3.11/dist-packages (from rich-toolkit>=0.14.8->fastapi-cli>=0.0.8->fastapi-cli[standard]>=0.0.8; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm) (13.9.4)\n",
            "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.11/dist-packages (from soundfile>=0.12.1->mistral_common>=1.8.2->mistral_common[audio,image]>=1.8.2->vllm) (1.17.1)\n",
            "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.11/dist-packages (from typer>=0.15.1->fastapi-cli>=0.0.8->fastapi-cli[standard]>=0.0.8; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm) (1.5.4)\n",
            "Requirement already satisfied: pycparser in /usr/local/lib/python3.11/dist-packages (from cffi>=1.0->soundfile>=0.12.1->mistral_common>=1.8.2->mistral_common[audio,image]>=1.8.2->vllm) (2.22)\n",
            "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich>=13.7.1->rich-toolkit>=0.14.8->fastapi-cli>=0.0.8->fastapi-cli[standard]>=0.0.8; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm) (3.0.0)\n",
            "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich>=13.7.1->rich-toolkit>=0.14.8->fastapi-cli>=0.0.8->fastapi-cli[standard]>=0.0.8; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm) (2.19.2)\n",
            "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich>=13.7.1->rich-toolkit>=0.14.8->fastapi-cli>=0.0.8->fastapi-cli[standard]>=0.0.8; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm) (0.1.2)\n",
            "Downloading vllm-0.10.0-cp38-abi3-manylinux1_x86_64.whl (386.6 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m386.6/386.6 MB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading compressed_tensors-0.10.2-py3-none-any.whl (169 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m169.0/169.0 kB\u001b[0m \u001b[31m14.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading depyf-0.19.0-py3-none-any.whl (39 kB)\n",
            "Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading lark-1.2.2-py3-none-any.whl (111 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m111.0/111.0 kB\u001b[0m \u001b[31m11.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading numba-0.61.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (3.8 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.8/3.8 MB\u001b[0m \u001b[31m105.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading outlines_core-0.2.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m89.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading torch-2.7.1-cp311-cp311-manylinux_2_28_x86_64.whl (821.2 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m821.2/821.2 MB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading torchaudio-2.7.1-cp311-cp311-manylinux_2_28_x86_64.whl (3.5 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.5/3.5 MB\u001b[0m \u001b[31m111.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading torchvision-0.22.1-cp311-cp311-manylinux_2_28_x86_64.whl (7.5 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.5/7.5 MB\u001b[0m \u001b[31m133.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading xformers-0.0.31-cp39-abi3-manylinux_2_28_x86_64.whl (117.1 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.1/117.1 MB\u001b[0m \u001b[31m20.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading xgrammar-0.1.21-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.8 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.8/11.8 MB\u001b[0m \u001b[31m128.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (393.1 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m393.1/393.1 MB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (8.9 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.9/8.9 MB\u001b[0m \u001b[31m134.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl (23.7 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m23.7/23.7 MB\u001b[0m \u001b[31m99.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (897 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m897.7/897.7 kB\u001b[0m \u001b[31m52.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_x86_64.whl (571.0 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m571.0/571.0 MB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (200.2 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m200.2/200.2 MB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (1.1 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m65.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (56.3 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m43.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (158.2 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m158.2/158.2 MB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (216.6 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m216.6/216.6 MB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl (19.7 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.7/19.7 MB\u001b[0m \u001b[31m109.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (89 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m89.3/89.3 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading triton-3.3.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (155.7 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m155.7/155.7 MB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading transformers-4.55.0-py3-none-any.whl (11.3 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.3/11.3 MB\u001b[0m \u001b[31m141.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading gguf-0.17.1-py3-none-any.whl (96 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m96.2/96.2 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading llguidance-0.7.30-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.0 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.0/15.0 MB\u001b[0m \u001b[31m129.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading lm_format_enforcer-0.10.12-py3-none-any.whl (44 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.3/44.3 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading mistral_common-1.8.3-py3-none-any.whl (6.5 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.5/6.5 MB\u001b[0m \u001b[31m133.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading openai-1.90.0-py3-none-any.whl (734 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m734.6/734.6 kB\u001b[0m \u001b[31m53.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading prometheus_fastapi_instrumentator-7.1.0-py3-none-any.whl (19 kB)\n",
            "Downloading ray-2.48.0-cp311-cp311-manylinux2014_x86_64.whl (70.1 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m70.1/70.1 MB\u001b[0m \u001b[31m37.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading blake3-1.0.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (385 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m385.5/385.5 kB\u001b[0m \u001b[31m35.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading cbor2-5.6.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (249 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m249.2/249.2 kB\u001b[0m \u001b[31m25.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading msgspec-0.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (210 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m210.7/210.7 kB\u001b[0m \u001b[31m22.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hUsing cached ninja-1.11.1.4-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (422 kB)\n",
            "Downloading partial_json_parser-0.2.1.1.post6-py3-none-any.whl (10 kB)\n",
            "Downloading pybase64-1.4.2-cp311-cp311-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl (71 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.4/71.4 kB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading python_json_logger-3.3.0-py3-none-any.whl (15 kB)\n",
            "Downloading watchfiles-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (453 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m453.1/453.1 kB\u001b[0m \u001b[31m41.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading email_validator-2.2.0-py3-none-any.whl (33 kB)\n",
            "Downloading fastapi_cli-0.0.8-py3-none-any.whl (10 kB)\n",
            "Downloading interegular-0.3.3-py37-none-any.whl (23 kB)\n",
            "Downloading llvmlite-0.44.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (42.4 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.4/42.4 MB\u001b[0m \u001b[31m61.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading pydantic_extra_types-2.10.5-py3-none-any.whl (38 kB)\n",
            "Downloading astor-0.8.1-py2.py3-none-any.whl (27 kB)\n",
            "Downloading dnspython-2.7.0-py3-none-any.whl (313 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m313.6/313.6 kB\u001b[0m \u001b[31m29.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading fastapi_cloud_cli-0.1.5-py3-none-any.whl (18 kB)\n",
            "Downloading httptools-0.6.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (459 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m459.8/459.8 kB\u001b[0m \u001b[31m36.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading pycountry-24.6.1-py3-none-any.whl (6.3 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.3/6.3 MB\u001b[0m \u001b[31m127.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading python_dotenv-1.1.1-py3-none-any.whl (20 kB)\n",
            "Downloading rich_toolkit-0.14.9-py3-none-any.whl (25 kB)\n",
            "Downloading uvloop-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.0 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.0/4.0 MB\u001b[0m \u001b[31m114.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading rignore-0.6.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (950 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m950.6/950.6 kB\u001b[0m \u001b[31m60.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hInstalling collected packages: blake3, uvloop, triton, rignore, python-json-logger, python-dotenv, pycountry, pybase64, partial-json-parser, outlines_core, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-curand-cu12, nvidia-cufile-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, ninja, msgspec, llvmlite, llguidance, lark, interegular, httptools, gguf, dnspython, diskcache, cbor2, astor, watchfiles, nvidia-cusparse-cu12, nvidia-cufft-cu12, nvidia-cudnn-cu12, numba, email-validator, depyf, rich-toolkit, pydantic-extra-types, prometheus-fastapi-instrumentator, openai, nvidia-cusolver-cu12, lm-format-enforcer, transformers, torch, ray, fastapi-cloud-cli, fastapi-cli, xgrammar, xformers, torchvision, torchaudio, mistral_common, compressed-tensors, vllm\n",
            "  Attempting uninstall: triton\n",
            "    Found existing installation: triton 3.3.0\n",
            "    Uninstalling triton-3.3.0:\n",
            "      Successfully uninstalled triton-3.3.0\n",
            "  Attempting uninstall: nvidia-nvtx-cu12\n",
            "    Found existing installation: nvidia-nvtx-cu12 12.8.55\n",
            "    Uninstalling nvidia-nvtx-cu12-12.8.55:\n",
            "      Successfully uninstalled nvidia-nvtx-cu12-12.8.55\n",
            "  Attempting uninstall: nvidia-nvjitlink-cu12\n",
            "    Found existing installation: nvidia-nvjitlink-cu12 12.8.61\n",
            "    Uninstalling nvidia-nvjitlink-cu12-12.8.61:\n",
            "      Successfully uninstalled nvidia-nvjitlink-cu12-12.8.61\n",
            "  Attempting uninstall: nvidia-curand-cu12\n",
            "    Found existing installation: nvidia-curand-cu12 10.3.9.55\n",
            "    Uninstalling nvidia-curand-cu12-10.3.9.55:\n",
            "      Successfully uninstalled nvidia-curand-cu12-10.3.9.55\n",
            "  Attempting uninstall: nvidia-cufile-cu12\n",
            "    Found existing installation: nvidia-cufile-cu12 1.13.0.11\n",
            "    Uninstalling nvidia-cufile-cu12-1.13.0.11:\n",
            "      Successfully uninstalled nvidia-cufile-cu12-1.13.0.11\n",
            "  Attempting uninstall: nvidia-cuda-runtime-cu12\n",
            "    Found existing installation: nvidia-cuda-runtime-cu12 12.8.57\n",
            "    Uninstalling nvidia-cuda-runtime-cu12-12.8.57:\n",
            "      Successfully uninstalled nvidia-cuda-runtime-cu12-12.8.57\n",
            "  Attempting uninstall: nvidia-cuda-nvrtc-cu12\n",
            "    Found existing installation: nvidia-cuda-nvrtc-cu12 12.8.61\n",
            "    Uninstalling nvidia-cuda-nvrtc-cu12-12.8.61:\n",
            "      Successfully uninstalled nvidia-cuda-nvrtc-cu12-12.8.61\n",
            "  Attempting uninstall: nvidia-cuda-cupti-cu12\n",
            "    Found existing installation: nvidia-cuda-cupti-cu12 12.8.57\n",
            "    Uninstalling nvidia-cuda-cupti-cu12-12.8.57:\n",
            "      Successfully uninstalled nvidia-cuda-cupti-cu12-12.8.57\n",
            "  Attempting uninstall: nvidia-cublas-cu12\n",
            "    Found existing installation: nvidia-cublas-cu12 12.8.3.14\n",
            "    Uninstalling nvidia-cublas-cu12-12.8.3.14:\n",
            "      Successfully uninstalled nvidia-cublas-cu12-12.8.3.14\n",
            "  Attempting uninstall: llvmlite\n",
            "    Found existing installation: llvmlite 0.43.0\n",
            "    Uninstalling llvmlite-0.43.0:\n",
            "      Successfully uninstalled llvmlite-0.43.0\n",
            "  Attempting uninstall: nvidia-cusparse-cu12\n",
            "    Found existing installation: nvidia-cusparse-cu12 12.5.7.53\n",
            "    Uninstalling nvidia-cusparse-cu12-12.5.7.53:\n",
            "      Successfully uninstalled nvidia-cusparse-cu12-12.5.7.53\n",
            "  Attempting uninstall: nvidia-cufft-cu12\n",
            "    Found existing installation: nvidia-cufft-cu12 11.3.3.41\n",
            "    Uninstalling nvidia-cufft-cu12-11.3.3.41:\n",
            "      Successfully uninstalled nvidia-cufft-cu12-11.3.3.41\n",
            "  Attempting uninstall: nvidia-cudnn-cu12\n",
            "    Found existing installation: nvidia-cudnn-cu12 9.7.1.26\n",
            "    Uninstalling nvidia-cudnn-cu12-9.7.1.26:\n",
            "      Successfully uninstalled nvidia-cudnn-cu12-9.7.1.26\n",
            "  Attempting uninstall: numba\n",
            "    Found existing installation: numba 0.60.0\n",
            "    Uninstalling numba-0.60.0:\n",
            "      Successfully uninstalled numba-0.60.0\n",
            "  Attempting uninstall: openai\n",
            "    Found existing installation: openai 1.98.0\n",
            "    Uninstalling openai-1.98.0:\n",
            "      Successfully uninstalled openai-1.98.0\n",
            "  Attempting uninstall: nvidia-cusolver-cu12\n",
            "    Found existing installation: nvidia-cusolver-cu12 11.7.2.55\n",
            "    Uninstalling nvidia-cusolver-cu12-11.7.2.55:\n",
            "      Successfully uninstalled nvidia-cusolver-cu12-11.7.2.55\n",
            "  Attempting uninstall: transformers\n",
            "    Found existing installation: transformers 4.51.3\n",
            "    Uninstalling transformers-4.51.3:\n",
            "      Successfully uninstalled transformers-4.51.3\n",
            "  Attempting uninstall: torch\n",
            "    Found existing installation: torch 2.7.0+cu128\n",
            "    Uninstalling torch-2.7.0+cu128:\n",
            "      Successfully uninstalled torch-2.7.0+cu128\n",
            "  Attempting uninstall: torchvision\n",
            "    Found existing installation: torchvision 0.22.0+cu128\n",
            "    Uninstalling torchvision-0.22.0+cu128:\n",
            "      Successfully uninstalled torchvision-0.22.0+cu128\n",
            "  Attempting uninstall: torchaudio\n",
            "    Found existing installation: torchaudio 2.7.0+cu128\n",
            "    Uninstalling torchaudio-2.7.0+cu128:\n",
            "      Successfully uninstalled torchaudio-2.7.0+cu128\n",
            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
            "fastai 2.7.19 requires torch<2.7,>=1.10, but you have torch 2.7.1 which is incompatible.\n",
            "dots-ocr 1.0 requires transformers==4.51.3, but you have transformers 4.55.0 which is incompatible.\u001b[0m\u001b[31m\n",
            "\u001b[0mSuccessfully installed astor-0.8.1 blake3-1.0.5 cbor2-5.6.5 compressed-tensors-0.10.2 depyf-0.19.0 diskcache-5.6.3 dnspython-2.7.0 email-validator-2.2.0 fastapi-cli-0.0.8 fastapi-cloud-cli-0.1.5 gguf-0.17.1 httptools-0.6.4 interegular-0.3.3 lark-1.2.2 llguidance-0.7.30 llvmlite-0.44.0 lm-format-enforcer-0.10.12 mistral_common-1.8.3 msgspec-0.19.0 ninja-1.11.1.4 numba-0.61.2 nvidia-cublas-cu12-12.6.4.1 nvidia-cuda-cupti-cu12-12.6.80 nvidia-cuda-nvrtc-cu12-12.6.77 nvidia-cuda-runtime-cu12-12.6.77 nvidia-cudnn-cu12-9.5.1.17 nvidia-cufft-cu12-11.3.0.4 nvidia-cufile-cu12-1.11.1.6 nvidia-curand-cu12-10.3.7.77 nvidia-cusolver-cu12-11.7.1.2 nvidia-cusparse-cu12-12.5.4.2 nvidia-nvjitlink-cu12-12.6.85 nvidia-nvtx-cu12-12.6.77 openai-1.90.0 outlines_core-0.2.10 partial-json-parser-0.2.1.1.post6 prometheus-fastapi-instrumentator-7.1.0 pybase64-1.4.2 pycountry-24.6.1 pydantic-extra-types-2.10.5 python-dotenv-1.1.1 python-json-logger-3.3.0 ray-2.48.0 rich-toolkit-0.14.9 rignore-0.6.4 torch-2.7.1 torchaudio-2.7.1 torchvision-0.22.1 transformers-4.55.0 triton-3.3.1 uvloop-0.21.0 vllm-0.10.0 watchfiles-1.1.0 xformers-0.0.31 xgrammar-0.1.21\n",
            "nohup: failed to run command 'CUDA_VISIBLE_DEVICES=0': No such file or directory\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from pyngrok import ngrok\n",
        "public_url = ngrok.connect(8000, bind_tls=True)  # Adjust port if needed\n",
        "print(\"Public URL:\", public_url)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "iNPRVOjmUxJb",
        "outputId": "66388365-796e-4489-9285-17ad6ccad0ed"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Public URL: NgrokTunnel: \"https://988ecbb0776c.ngrok-free.app\" -> \"http://localhost:8000\"\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!CUDA_VISIBLE_DEVICES=0 vllm serve ./weights/DotsOCR --tensor-parallel-size 1 --gpu-memory-utilization 0.95  --chat-template-content-format string --served-model-name model --trust-remote-code"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "QbYEd_foT2QY",
        "outputId": "6c980927-042e-498a-e013-a575d4cf5132"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "2025-08-07 20:57:52.107021: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
            "2025-08-07 20:57:52.125111: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
            "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
            "E0000 00:00:1754600272.146783   10516 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
            "E0000 00:00:1754600272.153513   10516 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
            "W0000 00:00:1754600272.170115   10516 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
            "W0000 00:00:1754600272.170145   10516 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
            "W0000 00:00:1754600272.170148   10516 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
            "W0000 00:00:1754600272.170151   10516 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
            "2025-08-07 20:57:52.174913: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
            "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
            "INFO 08-07 20:57:57 [__init__.py:235] Automatically detected platform cuda.\n",
            "INFO 08-07 20:58:01 [api_server.py:1755] vLLM API server version 0.10.0\n",
            "INFO 08-07 20:58:01 [cli_args.py:261] non-default args: {'model_tag': './weights/DotsOCR', 'chat_template_content_format': 'string', 'model': './weights/DotsOCR', 'trust_remote_code': True, 'served_model_name': ['model'], 'gpu_memory_utilization': 0.95}\n",
            "INFO 08-07 20:58:01 [config.py:1604] Using max model len 131072\n",
            "INFO 08-07 20:58:01 [config.py:2434] Chunked prefill is enabled with max_num_batched_tokens=2048.\n",
            "2025-08-07 20:58:05.950037: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
            "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
            "E0000 00:00:1754600285.970806   10621 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
            "E0000 00:00:1754600285.977110   10621 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
            "W0000 00:00:1754600285.992571   10621 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
            "W0000 00:00:1754600285.992601   10621 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
            "W0000 00:00:1754600285.992604   10621 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
            "W0000 00:00:1754600285.992606   10621 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
            "INFO 08-07 20:58:11 [__init__.py:235] Automatically detected platform cuda.\n",
            "INFO 08-07 20:58:14 [core.py:572] Waiting for init message from front-end.\n",
            "INFO 08-07 20:58:14 [core.py:71] Initializing a V1 LLM engine (v0.10.0) with config: model='./weights/DotsOCR', speculative_config=None, tokenizer='./weights/DotsOCR', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None), seed=0, served_model_name=model, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=True, chunked_prefill_enabled=True, use_async_output_proc=True, pooler_config=None, compilation_config={\"level\":3,\"debug_dump_path\":\"\",\"cache_dir\":\"\",\"backend\":\"\",\"custom_ops\":[],\"splitting_ops\":[\"vllm.unified_attention\",\"vllm.unified_attention_with_output\",\"vllm.mamba_mixer2\"],\"use_inductor\":true,\"compile_sizes\":[],\"inductor_compile_config\":{\"enable_auto_functionalized_v2\":false},\"inductor_passes\":{},\"use_cudagraph\":true,\"cudagraph_num_of_warmups\":1,\"cudagraph_capture_sizes\":[512,504,496,488,480,472,464,456,448,440,432,424,416,408,400,392,384,376,368,360,352,344,336,328,320,312,304,296,288,280,272,264,256,248,240,232,224,216,208,200,192,184,176,168,160,152,144,136,128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],\"cudagraph_copy_inputs\":false,\"full_cuda_graph\":false,\"max_capture_size\":512,\"local_cache_dir\":null}\n",
            "INFO 08-07 20:58:15 [parallel_state.py:1102] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0, EP rank 0\n",
            "WARNING 08-07 20:58:15 [topk_topp_sampler.py:59] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer.\n",
            "INFO 08-07 20:58:15 [gpu_model_runner.py:1843] Starting to load model ./weights/DotsOCR...\n",
            "INFO 08-07 20:58:15 [gpu_model_runner.py:1875] Loading model from scratch...\n",
            "INFO 08-07 20:58:16 [cuda.py:290] Using Flash Attention backend on V1 engine.\n",
            "Loading safetensors checkpoint shards: 100% 2/2 [00:01<00:00,  1.06it/s]\n",
            "INFO 08-07 20:58:18 [default_loader.py:262] Loading weights took 1.99 seconds\n",
            "INFO 08-07 20:58:19 [gpu_model_runner.py:1892] Model loading took 5.7174 GiB and 2.253556 seconds\n",
            "INFO 08-07 20:58:19 [gpu_model_runner.py:2380] Encoder cache will be initialized with a budget of 14400 tokens, and profiled with 1 image items of the maximum feature size.\n",
            "The image processor of type `Qwen2VLImageProcessor` is now loaded as a fast processor by default, even if the model checkpoint was saved with a slow processor. This is a breaking change and may produce slightly different outputs. To continue using the slow processor, instantiate this class with `use_fast=False`. Note that this behavior will be extended to all models in a future release.\n",
            "You have video processor config saved in `preprocessor.json` file which is deprecated. Video processor configs should be saved in their own `video_preprocessor.json` file. You can rename the file or load and save the processor back which renames it automatically. Loading from `preprocessor.json` will be removed in v5.0.\n",
            "INFO 08-07 20:58:49 [backends.py:530] Using cache directory: /root/.cache/vllm/torch_compile_cache/f40f68567f/rank_0_0/backbone for vLLM's torch.compile\n",
            "INFO 08-07 20:58:49 [backends.py:541] Dynamo bytecode transform time: 8.76 s\n",
            "INFO 08-07 20:58:56 [backends.py:161] Directly load the compiled graph(s) for dynamic shape from the cache, took 6.316 s\n",
            "INFO 08-07 20:58:56 [monitor.py:34] torch.compile takes 8.76 s in total\n",
            "INFO 08-07 20:58:58 [gpu_worker.py:255] Available KV cache memory: 12.20 GiB\n",
            "INFO 08-07 20:58:58 [kv_cache_utils.py:833] GPU KV cache size: 456,816 tokens\n",
            "INFO 08-07 20:58:58 [kv_cache_utils.py:837] Maximum concurrency for 131,072 tokens per request: 3.49x\n",
            "Capturing CUDA graph shapes: 100% 67/67 [00:02<00:00, 24.17it/s]\n",
            "INFO 08-07 20:59:01 [gpu_model_runner.py:2485] Graph capturing finished in 3 secs, took 0.44 GiB\n",
            "INFO 08-07 20:59:01 [core.py:193] init engine (profile, create kv cache, warmup model) took 42.75 seconds\n",
            "INFO 08-07 20:59:02 [loggers.py:141] Engine 000: vllm cache_config_info with initialization after num_gpu_blocks is: 28551\n",
            "INFO 08-07 20:59:02 [api_server.py:1818] Starting vLLM API server 0 on http://0.0.0.0:8000\n",
            "INFO 08-07 20:59:02 [launcher.py:29] Available routes are:\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /openapi.json, Methods: HEAD, GET\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /docs, Methods: HEAD, GET\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /docs/oauth2-redirect, Methods: HEAD, GET\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /redoc, Methods: HEAD, GET\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /health, Methods: GET\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /load, Methods: GET\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /ping, Methods: POST\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /ping, Methods: GET\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /tokenize, Methods: POST\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /detokenize, Methods: POST\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /v1/models, Methods: GET\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /version, Methods: GET\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /v1/responses, Methods: POST\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /v1/responses/{response_id}, Methods: GET\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /v1/responses/{response_id}/cancel, Methods: POST\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /v1/chat/completions, Methods: POST\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /v1/completions, Methods: POST\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /v1/embeddings, Methods: POST\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /pooling, Methods: POST\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /classify, Methods: POST\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /score, Methods: POST\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /v1/score, Methods: POST\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /v1/audio/transcriptions, Methods: POST\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /v1/audio/translations, Methods: POST\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /rerank, Methods: POST\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /v1/rerank, Methods: POST\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /v2/rerank, Methods: POST\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /scale_elastic_ep, Methods: POST\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /is_scaling_elastic_ep, Methods: POST\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /invocations, Methods: POST\n",
            "INFO 08-07 20:59:02 [launcher.py:37] Route: /metrics, Methods: GET\n",
            "\u001b[32mINFO\u001b[0m:     Started server process [\u001b[36m10516\u001b[0m]\n",
            "\u001b[32mINFO\u001b[0m:     Waiting for application startup.\n",
            "\u001b[32mINFO\u001b[0m:     Application startup complete.\n",
            "\u001b[32mINFO\u001b[0m:     2001:818:c61b:b000:457e:d747:75e4:7263:0 - \"\u001b[1mGET / HTTP/1.1\u001b[0m\" \u001b[31m404 Not Found\u001b[0m\n",
            "\u001b[32mINFO\u001b[0m:     2001:818:c61b:b000:457e:d747:75e4:7263:0 - \"\u001b[1mGET /favicon.ico HTTP/1.1\u001b[0m\" \u001b[31m404 Not Found\u001b[0m\n",
            "INFO 08-07 21:00:28 [launcher.py:80] Shutting down FastAPI HTTP server.\n",
            "[rank0]:[W807 21:00:29.608158947 ProcessGroupNCCL.cpp:1479] Warning: WARNING: destroy_process_group() was not called before program exit, which can leak resources. For more info, please see https://pytorch.org/docs/stable/distributed.html#shutdown (function operator())\n",
            "\u001b[32mINFO\u001b[0m:     Shutting down\n",
            "\u001b[31mERROR\u001b[0m:    Traceback (most recent call last):\n",
            "  File \"/usr/local/lib/python3.11/dist-packages/starlette/routing.py\", line 701, in lifespan\n",
            "    await receive()\n",
            "  File \"/usr/local/lib/python3.11/dist-packages/uvicorn/lifespan/on.py\", line 137, in receive\n",
            "    return await self.receive_queue.get()\n",
            "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
            "  File \"/usr/lib/python3.11/asyncio/queues.py\", line 158, in get\n",
            "    await getter\n",
            "asyncio.exceptions.CancelledError\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "mbOe-1sxU1-r"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}
